In [1]:
# Star Wars Data Analysis
# 
# Exploring narrative patterns and storytelling elements in the Star Wars original trilogy using only the generated data.

import json
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import networkx as nx
from collections import Counter, defaultdict
import re
import os
from IPython.display import SVG, Image, display
from IPython import get_ipython
In [2]:
# Import our visualization functions
from star_wars_visualizations import (
    timeline_visualization,
    character_interaction_network,
    location_frequency,
    character_appearances,
    event_significance,
    quote_analysis,
    battle_analysis,
    force_user_analysis,
    ship_analysis,
    relationship_map,
    faction_distribution
)
In [3]:
# Import our visualization functions
from star_wars_visualizations_extended import (
    save_and_show_fig,
    event_significance_by_episode,
    character_appearances_timeline,
    scene_context_analysis,
    character_co_occurrence,
    location_appearances_by_episode,
    character_network_by_episode,
    character_affiliation_analysis,
    quote_analysis_by_character,
    relationship_types_analysis,
    battle_analysis_through_trilogy
)
In [4]:
# Load the data
with open('./data/star_wars_trilogy.json', 'r') as file:
    data = json.load(file)

# For easier reference
characters = data['characters']
jedis = data['jedis']
siths = data['siths']
droids = data.get('droids', {})
planets = data['planets']
spaceships = data['spaceships']
events = data['events']
battles = data['battles']
relationships = data['relationships']
timelines = data['timelines']
In [5]:
print(f"Dataset loaded with {len(characters)} characters, {len(planets)} planets, and {sum(len(ep) for ep in timelines.values())} events across 3 films.")
Dataset loaded with 44 characters, 23 planets, and 160 events across 3 films.
In [6]:
# check if running in jupyter/colab or just rendering in GitHub

shell = get_ipython().__class__.__name__
show_interactive_fig = False
use_SVG = True
print(shell)
if (shell == 'ZMQInteractiveShell') or (shell == 'google.colab._shell'):
    show_interactive_fig = True
else:
    show_interactive_fig = False

print("interative visualizations: ",show_interactive_fig)
ZMQInteractiveShell
interative visualizations:  True
In [7]:
# test image display
# show_interactive_fig = False
In [8]:
def show_fig_or_image(fig):
    image_filename = (fig.layout.title.text).replace(' ', '_')
    if show_interactive_fig:
        fig.show(config={'toImageButtonOptions': {'format': 'svg', 'filename': image_filename}})
    else:
        image_filename_full = './images/'+image_filename+'.svg'
        if use_SVG:
            image = SVG(filename=image_filename_full)
        else:
            image = Image(filename=image_filename_full)
        display(image)
In [9]:
# ## 1. Timeline Visualization
# This visualization shows how events flow across the Star Wars original trilogy timeline.

fig1 = timeline_visualization()
# fig1.show()
show_fig_or_image(fig1)
In [10]:
# ## 2. Character Interaction Network
# This network graph shows which characters interact with each other the most throughout the trilogy.

fig2 = character_interaction_network()
# fig2.show()
show_fig_or_image(fig2)

1. Event Significance by Episode¶

In [11]:
# This visualization shows how the significance of events flows through each episode.

fig1 = event_significance_by_episode(data)
# fig1.show()
show_fig_or_image(fig1)
In [12]:
# how can we extract the title of a plotfly figure?
# dir(fig1.layout.title.text)

2. Character Appearances Timeline¶

In [13]:
# This tracks when major characters appear throughout the trilogy.

fig2 = character_appearances_timeline(data)
# fig2.show()
show_fig_or_image(fig2)

3. Scene Context Analysis¶

In [14]:
# This analyzes what types of scenes (battles, dialogues, revelations) occur in each episode.

fig3 = scene_context_analysis(data)
# fig3.show()
show_fig_or_image(fig3)

4. Character Co-occurrence Analysis¶

In [15]:
# This heatmap shows which characters appear together most frequently.

fig4 = character_co_occurrence(data)
# fig4.show()
show_fig_or_image(fig4)

5. Location Appearances by Episode¶

In [16]:
# This treemap shows where scenes take place across the trilogy.

fig5 = location_appearances_by_episode(data)
# fig5.show()
show_fig_or_image(fig5)

6. Character Network By Episode¶

In [17]:
# This shows how character networks evolve across the three films.

fig6 = character_network_by_episode(data)
# fig6.show()
show_fig_or_image(fig6)

7. Character Affiliation Analysis¶

In [18]:
# This analyzes the balance of power between different factions.

fig7 = character_affiliation_analysis(data)
# fig7.show()
show_fig_or_image(fig7)

8. Quote Analysis by Character¶

In [19]:
# This examines which characters have the most memorable quotes.

fig8 = quote_analysis_by_character(data)
# fig8.show()
show_fig_or_image(fig8)
In [20]:
# ## 3. Location Frequency Analysis
# This visualization shows which locations appear most frequently in the trilogy.

fig3 = location_frequency()
# fig3.show()
show_fig_or_image(fig3)
In [21]:
# ## 4. Character Appearances by Episode
# This chart displays how frequently major characters appear in each episode.

fig4 = character_appearances()
# fig4.show()
show_fig_or_image(fig4)
In [22]:
# ## 5. Event Significance Levels
# This visualization breaks down the events by their significance level in each episode.

fig5 = event_significance()
# fig5.show()
show_fig_or_image(fig5)
In [23]:
# ## 6. Memorable Quotes Analysis
# This analysis shows which characters have the most memorable quotes and some examples.

fig6 = quote_analysis()
# fig6.show()
show_fig_or_image(fig6)
In [24]:
# ## 7. Battle Analysis
# This visualization compares casualties across the major battles in the trilogy.

fig7 = battle_analysis()
# fig7.show()
show_fig_or_image(fig7)
In [25]:
# ## 8. Force User Analysis
# This analysis compares Jedi and Sith characters, showing their relative power levels.

fig8 = force_user_analysis()
# fig8.show()
show_fig_or_image(fig8)
In [26]:
# ## 9. Ship Analysis
# This visualization examines the appearances and classes of starships in the trilogy.

fig9 = ship_analysis()
# fig9.show()
show_fig_or_image(fig9)
In [27]:
# ## 10. Relationship Map
# This network visualization maps out the various relationships between characters.

fig10 = relationship_map()
# fig10.show()
show_fig_or_image(fig10)
In [28]:
# ## 11. Faction Distribution
# This analysis shows how characters are distributed across different factions.

fig11 = faction_distribution()
# fig11.show()
show_fig_or_image(fig11)

7. Character Affiliation Analysis¶

In [29]:
# This analyzes the balance of power between different factions.

fig7 = character_affiliation_analysis(data)
# fig7.show()
show_fig_or_image(fig7)

8. Quote Analysis by Character¶

In [30]:
# This examines which characters have the most memorable quotes.

fig8 = quote_analysis_by_character(data)
# fig8.show()
show_fig_or_image(fig8)

9. Relationship Types Analysis¶

In [31]:
# This investigates the different types of relationships between characters.

fig9 = relationship_types_analysis(data)
# fig9.show()
show_fig_or_image(fig9)

10. Battle Analysis Through the Trilogy¶

In [32]:
# This compares the scale and outcomes of battles across the three films.

fig10 = battle_analysis_through_trilogy(data)
# fig10.show()
show_fig_or_image(fig10)
In [33]:
# Shit we did:
# 
# - The timeline shows how events build across the three movies
# - Character networks reveal Luke, Leia, Han, and Vader as the most connected characters
# - The Death Star, Tatooine, and the Millennium Falcon are the most frequently visited locations
# - Events in Episode V and VI tend to have higher significance than Episode IV
# - Most major battles result in significant casualties, especially the Battle of Endor
# - The Skywalker family relationships form the core narrative of the trilogy
# - The Rebel Alliance and Galactic Empire are the dominant factions
# 
In [ ]: